In [ ]:
!pip install boto3
Collecting boto3 Downloading boto3-1.34.68-py3-none-any.whl.metadata (6.6 kB) Collecting botocore<1.35.0,>=1.34.68 (from boto3) Downloading botocore-1.34.68-py3-none-any.whl.metadata (5.7 kB) Collecting jmespath<2.0.0,>=0.7.1 (from boto3) Using cached jmespath-1.0.1-py3-none-any.whl.metadata (7.6 kB) Collecting s3transfer<0.11.0,>=0.10.0 (from boto3) Using cached s3transfer-0.10.1-py3-none-any.whl.metadata (1.7 kB) Requirement already satisfied: python-dateutil<3.0.0,>=2.1 in ./.venv/lib/python3.10/site-packages (from botocore<1.35.0,>=1.34.68->boto3) (2.9.0.post0) Collecting urllib3!=2.2.0,<3,>=1.25.4 (from botocore<1.35.0,>=1.34.68->boto3) Using cached urllib3-2.2.1-py3-none-any.whl.metadata (6.4 kB) Requirement already satisfied: six>=1.5 in ./.venv/lib/python3.10/site-packages (from python-dateutil<3.0.0,>=2.1->botocore<1.35.0,>=1.34.68->boto3) (1.16.0) Downloading boto3-1.34.68-py3-none-any.whl (139 kB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 139.3/139.3 kB 3.5 MB/s eta 0:00:00 MB/s eta 0:00:01 Downloading botocore-1.34.68-py3-none-any.whl (12.0 MB) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 12.0/12.0 MB 5.0 MB/s eta 0:00:00m eta 0:00:010:01:01 Using cached jmespath-1.0.1-py3-none-any.whl (20 kB) Using cached s3transfer-0.10.1-py3-none-any.whl (82 kB) Using cached urllib3-2.2.1-py3-none-any.whl (121 kB) Installing collected packages: urllib3, jmespath, botocore, s3transfer, boto3 Successfully installed boto3-1.34.68 botocore-1.34.68 jmespath-1.0.1 s3transfer-0.10.1 urllib3-2.2.1
In [ ]:
import boto3
import base64
import json
s3 = boto3.client('s3')
bedrock_runtime = boto3.client('bedrock-runtime', region_name='us-west-2')
In [ ]:
# "anthropic_version": "bedrock-2023-05-31",
# "max_tokens": 4096,
# "top_k": 250,
# "top_p": 0.999,
# "temperature": 0,
def ocr_using_bedrock(file_path) :
# 이미지 파일 읽기
with open(file_path, 'rb') as image_file:
image_content = image_file.read()
# 이미지를 base64로 인코딩
base64_encoded_image = base64.b64encode(image_content).decode('utf-8')
# prompt = '이미지에서 텍스트를 추출해주세요. 추측하지 말고 정확하게 추출해주세요. 언어는 한글과 영어, 숫자입니다.'
# prompt = 'Please extract text from the image. Do not guess; extract it accurately. The languages are Korean, English, and numbers'
prompt = """
이미지에 있는 글자를 추출해서 아래 포맷으로 작성해줘
이미지에 있는 텍스트는 다음과 같습니다:
{추출글자}
"""
payload = {
"modelId": "anthropic.claude-3-sonnet-20240229-v1:0",
"contentType": "application/json",
"accept": "application/json",
"body": {
"anthropic_version": "bedrock-2023-05-31",
"max_tokens": 4096,
"top_k": 250,
"top_p": 0.999,
"temperature": 1,
"messages": [
{
"role": "user",
"content": [
{
"type": "image",
"source": {
"type": "base64",
"media_type": "image/png",
"data": base64_encoded_image
}
},
{
"type": "text",
"text": prompt
}
]
}
]
}
}
# Convert the payload to bytes
body_bytes = json.dumps(payload['body']).encode('utf-8')
# Invoke the model
response = bedrock_runtime.invoke_model(
body=body_bytes,
contentType=payload['contentType'],
accept=payload['accept'],
modelId=payload['modelId']
)
# Process the response
response_body = json.loads(response['body'].read())
result = response_body['content'][0]['text']
return result
In [ ]:
!pip install easyocr
!pip install numpy
!pip install pretty
In [ ]:
import io
import boto3
import easyocr
import numpy as np
import base64
In [ ]:
from PIL import Image
In [ ]:
from pprint import pprint
In [ ]:
def ocr_using_easyocr(file_path) :
file_path = file_path
# 이미지 파일 읽기
with open(file_path, 'rb') as image_file:
image_content = image_file.read()
# 이미지를 base64로 인코딩
# base64_encoded_image = base64.b64encode(image_content).decode('utf-8')
# image_content = image_obj['Body'].read()
image = Image.open(io.BytesIO(image_content))
image_np = np.array(image)
# key = event.get('Key')
# bucket = event.get('Bucket')
# target_languages = event.get('TargetLanguages')
target_languages = [
"en",
"ko"
]
# s3_client = boto3.client('s3')
# image_obj = s3_client.get_object(Bucket=bucket, Key=key)
# image_content = image_obj['Body'].read()
# image = Image.open(io.BytesIO(image_content))
# image_np = np.array(image)
# image_np = np.array(base64_encoded_image)
reader = easyocr.Reader(
target_languages,
model_storage_directory='./',
user_network_directory='./',
download_enabled=True,
gpu=False
)
results = reader.readtext(image_np)
results = reader.readtext(image_np)
detected_texts = []
positions = []
for result in results:
text = result[1]
detected_texts.append(text)
position = result[0]
top_left = [int(coord) for coord in position[0]]
top_right = [int(coord) for coord in position[1]]
bottom_right = [int(coord) for coord in position[2]]
bottom_left = [int(coord) for coord in position[3]]
positions.append({
"Text": text,
"TopLeft": {
"x": top_left[0],
"y": top_left[1]
},
"TopRight": {
"x": top_right[0],
"y": top_right[1]
},
"BottomRight": {
"x": bottom_right[0],
"y": bottom_right[1]
},
"BottomLeft": {
"x": bottom_left[0],
"y": bottom_left[1]
}
})
detected_texts_join = ' '.join([result[1] for result in results])
return {
'DetectedText': detected_texts_join,
'DetectedResults': positions
}
In [ ]:
import IPython.display
import os
dir_path = 'img'
for item in os.listdir(dir_path):
file_path = os.path.join(dir_path, item)
# file_path = 'img/sample-1.png'
display(IPython.display.Image(filename=file_path))
bedrock_result = ocr_using_bedrock(file_path)
print("# BedLock - Claud3")
print(bedrock_result)
print("")
print("# EasyOCR")
easyocr_result = ocr_using_easyocr(file_path)
pprint(easyocr_result['DetectedText'])
Using CPU. Note: This module is much faster with a GPU.
# BedLock - Claud3 이미지에 있는 텍스트는 다음과 같습니다: 동원몬행 이 행 352-1098-553 개인 정보 보호를 위해 일부 정보를 가렸습니다. # EasyOCR '농협은행 이 형 352-1098- 5531'
Using CPU. Note: This module is much faster with a GPU.
# BedLock - Claud3 이미지에 있는 텍스트는 다음과 같습니다: 안녕하세요 근무중이라 서옵푬매하실분만 가족ID :Vg 3 # EasyOCR '안녕하세요 근무중이라 IO 서요구매하실분 만 카특ID :vg 8 다"다-터스 -d'
Using CPU. Note: This module is much faster with a GPU.
# BedLock - Claud3 이미지에 있는 텍스트는 다음과 같습니다: 4억원현행 176882-51-63 과, 천 # EasyOCR '넓권온 행 176882-51- 63 과 권'
Using CPU. Note: This module is much faster with a GPU.
# BedLock - Claud3 이미지에 있는 텍스트는 다음과 같습니다: 휴먼(주) LIU ONG LIE 352-1307--53 # EasyOCR '눈파 (주> LIU ONG LIE 352 연 1367 - -53'
Using CPU. Note: This module is much faster with a GPU.
# BedLock - Claud3 이미지에 있는 텍스트는 다음과 같습니다: 2022년 1월 18일 오후 12:44 내 계좌 농협: 352 1098 53 # EasyOCR '< 메모 2022년 1월 18일 오후 12.44 내 계좌 농협:352 1098 53'
Using CPU. Note: This module is much faster with a GPU.
# BedLock - Claud3 이미지에 있는 텍스트는 다음과 같습니다: 안녕하세요 일종이라서요 구매하실분만 카톡아이디: kc 3 상담문의주세요 # EasyOCR '안녕하세요 일중이라서요 구매하실분만 카특아이디: kc 8 상담문의주세요 @하> N N LG'
In [ ]: